Lesson 06 - AI Programming - Python Rating Prediction
Dr Tran Anh Tuan (Head of AI in VTC Academy)
# Mount drive
from google.colab import drive
drive.mount('/content/gdrive')
import os
path_Data = "//content//gdrive//MyDrive//VTCE AI TUAN//Rating Prediction//Game//"
checkPath = os.path.isdir(path_Data)
checkFile = os.path.isfile(path_Data + "playerStats.csv")
print("The path and file are valid or not :", checkPath, checkFile)
import pandas as pd
import numpy as np
data = pd.read_csv(path_Data + "playerStats.csv")
data.head(20)
import matplotlib.pyplot as plt
import math
from IPython.core.debugger import Tracer;
# Average damage per round : ADR
dataSet = data.loc[:,["ADR", "Rating"]]
dataSet['Rating'] = dataSet['Rating']*100
display(dataSet.head())
X= np.array(dataSet['ADR'])
y = np.array(dataSet['Rating'])
plt.scatter(X,y)
plt.xlabel('ADR')
plt.ylabel('Rating')
plt.grid()
plt.show()
def SSE(m,b,data):
totalError=0.0
totalNan = 0
for i in range(data.shape[0]):
if(math.isnan(data[i,0])):
totalNan +=1
else:
yOutput = m*data[i,0]+b
y = data[i,1]
error = (y-yOutput)**2
totalError =totalError+ error
return totalError
m = 3
b = 2
plt.scatter(X,y)
plt.plot(X,m*X+b,color='red')
plt.show()
data = np.array(dataSet)
data
data[1,0]
sse = SSE(m,b,data)
print('For the fitting line: y = %sx + %s\nSSE: %.2f' %(m,b,sse))
def gradient_descent_step(m,b,data):
n_points = data.shape[0] #size of data
m_grad = 0
b_grad = 0
stepper = 0.0001 #this is the learning rate
for i in range(n_points):
#Get current pair (x,y)
x = data[i,0]
y = data[i,1]
if(math.isnan(x)|math.isnan(y)): #it will prevent for crashing when some data is missing
#print("is nan")
continue
#you will calculate the partical derivative for each value in data
#Partial derivative respect 'm'
dm = -((2/n_points) * x * (y - (m*x + b)))
#Partial derivative respect 'b'
db = - ((2/n_points) * (y - (m*x + b)))
#Update gradient
m_grad = m_grad + dm
b_grad = b_grad + db
#Set the new 'better' updated 'm' and 'b'
m_updated = m - stepper*m_grad
b_updated = b - stepper*b_grad
#print('m ', m)
##print('steepr*gradient ',stepper*m_grad)
#print('m_updated', m_updated)
'''
Important note: The value '0.0001' that multiplies the 'm_grad' and 'b_grad' is the 'learning rate', but it's a concept
out of the scope of this challenge. For now, just leave that there and think about it like a 'smoother' of the learn,
to prevent overshooting, that is, an extremly fast and uncontrolled learning.
'''
return m_updated,b_updated
m = 3
b = 2
print('Starting line: y = %.2fx + %.2f - Error: %.2f' %(m,b,sse))
for i in range(50): #Should Try 10000
m,b = gradient_descent_step(m,b,data)
sse = SSE(m,b,data)
print('At step %d - Line: y = %.2fx + %.2f - Error: %.2f' %(i+1,m,b,sse))
print('\nBest line: y = %.2fx + %.2f - Error: %.2f' %(m,b,sse))
print ('m ', m)
print('b ', b)
plt.scatter(X,y)
plt.plot(X,m*X+b,color='red')
plt.plot(X,3*X+2,color='green')
plt.show()
House Price Prediction
Sale Price and Living Area
import os
path_Data = "//content//gdrive//MyDrive//VTCE AI TUAN//House Price Prediction//"
checkPath = os.path.isdir(path_Data)
checkFile = os.path.isfile(path_Data + "house-prices 2.csv")
print("The path and file are valid or not :", checkPath, checkFile)
#Load the libraries and data...
import matplotlib.pyplot as plt
import matplotlib.animation as animation
data = pd.read_csv(path_Data + "house-prices 2.csv")
display(data.head())
x = data['GrLivArea']
y = data['SalePrice']
x = (x - x.mean()) / x.std()
x = np.c_[np.ones(x.shape[0]), x]
#GRADIENT DESCENT
alpha = 0.01 #Step size
iterations = 2000 #No. of iterations
m = y.size #No. of data points
np.random.seed(123) #Set the seed
theta = np.random.rand(2) #Pick some random values to start with
#GRADIENT DESCENT
def gradient_descent(x, y, theta, iterations, alpha):
past_costs = []
past_thetas = [theta]
for i in range(iterations):
prediction = np.dot(x, theta)
error = prediction - y
cost = 1/(2*m) * np.dot(error.T, error)
past_costs.append(cost)
theta = theta - (alpha * (1/m) * np.dot(x.T, error))
past_thetas.append(theta)
return past_thetas, past_costs
#Pass the relevant variables to the function and get the new values back...
past_thetas, past_costs = gradient_descent(x, y, theta, iterations, alpha)
theta = past_thetas[-1]
#Print the results...
print("Gradient Descent: {:.2f}, {:.2f}".format(theta[0], theta[1]))
#Plot the cost function...
plt.title('Cost Function J')
plt.xlabel('No. of iterations')
plt.ylabel('Cost')
plt.plot(past_costs)
plt.show()
#Animation
#Set the plot up,
fig = plt.figure()
ax = plt.axes()
plt.title('Sale Price vs Living Area')
plt.xlabel('Living Area in square feet (normalised)')
plt.ylabel('Sale Price ($)')
plt.scatter(x[:,1], y, color='red')
line, = ax.plot([], [], lw=2)
annotation = ax.text(-1, 700000, '')
annotation.set_animated(True)
plt.close()
#Generate the animation data,
def init():
line.set_data([], [])
annotation.set_text('')
return line, annotation
# animation function. This is called sequentially
def animate(i):
x = np.linspace(-5, 20, 1000)
y = past_thetas[i][1]*x + past_thetas[i][0]
line.set_data(x, y)
annotation.set_text('Cost = %.2f e10' % (past_costs[i]/10000000000))
return line, annotation
anim = animation.FuncAnimation(fig, animate, init_func=init,
frames=300, interval=0, blit=True)
anim.save('animation.gif', writer='pillow', fps = 30)
#Display the animation...
import io
import base64
from IPython.display import HTML
filename = 'animation.gif'
video = io.open(filename, 'r+b').read()
encoded = base64.b64encode(video)
HTML(data='''<img src="data:image/gif;base64,{0}" type="gif" />'''.format(encoded.decode('ascii')))